package com.zz.zjc.util;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.util.*;

/**
 *  敏感词过滤的工具类，
 *  如果想自定义敏感词请通过初始化
 *  默认初始化classpath:file/SensitiveWords/文件夹下面的text文件
 *
 *  initKeyWord
 */
public class SensitiveWordFilteringUtil {
	private static Logger logger = LoggerFactory.getLogger(SensitiveWordFilteringUtil.class);
	private static Map sensitiveWordMap = null;
	final public static int minMatchTYpe = 1;
	final public static int maxMatchType = 2;
	static {
		try {
			Set<String> set = new HashSet<>();
			File SensitiveWordsFile = org.springframework.util.ResourceUtils.getFile("classpath:file/SensitiveWords");
			File[] files=SensitiveWordsFile.listFiles();
			for (File file:files ) {
				InputStreamReader read = new InputStreamReader(new FileInputStream(file));
				try {
					if(file.isFile() && file.exists()){      //文件流是否存在
						BufferedReader bufferedReader = new BufferedReader(read);
						String txt = null;
						while((txt = bufferedReader.readLine()) != null){    //读取文件，将文件内容放入到set中
							set.add(txt);
							logger.info("-----------------------------敏感词:"+txt);
						}
					}
				} catch (Exception e) {
				}finally{
					read.close();     //关闭文件流
				}
			}
			addSensitiveWordToHashMap(set);

		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}

	}

	/**
	 * 通过set集合初始化
	 * @param keyWord
	 */
	public static void initKeyWord(Set<String> keyWord){
		addSensitiveWordToHashMap(keyWord);
	};
	/**
	 * 通过List集合初始化
	 * @param keyWord
	 */
	public static void initKeyWord(List<String>  keyWord){
		addSensitiveWordToHashMap(keyWord);
	};
	/**
	 * 初始化关键词  list转换成map
	 * @param keyWord
	 */
	private static  void addSensitiveWordToHashMap(List<String>  keyWord){
		sensitiveWordMap = new HashMap(keyWord.size());     //初始化敏感词容器，减少扩容操作
		String key = null;
		Map nowMap = null;
		Map<String, String> newWorMap = null;
		for (String keyString:keyWord  ) {
			key = keyString;    //关键字
			nowMap = sensitiveWordMap;
			for(int i = 0 ; i < key.length() ; i++){
				char keyChar = key.charAt(i);       //转换成char型
				Object wordMap = nowMap.get(keyChar);       //获取
				if(wordMap != null){        //如果存在该key，直接赋值
					nowMap = (Map) wordMap;
				}
				else{
					//不存在则，则构建一个map，同时将isEnd设置为0，因为他不是最后一个
					newWorMap = new HashMap<String,String>();
					newWorMap.put("isEnd", "0");     //不是最后一个
					nowMap.put(keyChar, newWorMap);
					nowMap = newWorMap;
				}
				if(i == key.length() - 1){
					nowMap.put("isEnd", "1");    //最后一个
				}
			}
		}
	}

	/**
	 * 初始化关键词  set转换成map
	 * @param keyWord
	 */
	private static void addSensitiveWordToHashMap(Set<String> keyWord) {
		sensitiveWordMap = new HashMap(keyWord.size());     //初始化敏感词容器，减少扩容操作
		String key = null;
		Map nowMap = null;
		Map<String, String> newWorMap = null;
		//迭代keyWordSet
		Iterator<String> iterator = keyWord.iterator();
		while(iterator.hasNext()){
			key = iterator.next();    //关键字
			nowMap = sensitiveWordMap;
			for(int i = 0 ; i < key.length() ; i++){
				char keyChar = key.charAt(i);       //转换成char型
				Object wordMap = nowMap.get(keyChar);       //获取
				if(wordMap != null){        //如果存在该key，直接赋值
					nowMap = (Map) wordMap;
				}
				else{
					//不存在则，则构建一个map，同时将isEnd设置为0，因为他不是最后一个
					newWorMap = new HashMap<String,String>();
					newWorMap.put("isEnd", "0");     //不是最后一个
					nowMap.put(keyChar, newWorMap);
					nowMap = newWorMap;
				}
				if(i == key.length() - 1){
					nowMap.put("isEnd", "1");    //最后一个
				}
			}
		}
	}


	/**
	 * 判断文字是否包含敏感字符
	 * @param txt  文字
	 * @param matchType  匹配规则&nbsp;1：最小匹配规则，2：最大匹配规则
	 * @return 若包含返回true，否则返回false
	 * @version 1.0
	 */
	public static boolean isContaintSensitiveWord(String txt,int matchType){
		boolean flag = false;
		for(int i = 0 ; i < txt.length() ; i++){
			int matchFlag = CheckSensitiveWord(txt, i, matchType); //判断是否包含敏感字符
			if(matchFlag > 0){    //大于0存在，返回true
				flag = true;
			}
		}
		return flag;
	}

	/**
	 * 获取文字中的敏感词
	 * @param txt 文字
	 * @param matchType 匹配规则&nbsp;1：最小匹配规则，2：最大匹配规则
	 * @return
	 * @version 1.0
	 */
	public static Set<String> getSensitiveWord(String txt , int matchType){
		Set<String> sensitiveWordList = new HashSet<String>();

		for(int i = 0 ; i < txt.length() ; i++){
			int length = CheckSensitiveWord(txt, i, matchType);    //判断是否包含敏感字符
			if(length > 0){    //存在,加入list中
				sensitiveWordList.add(txt.substring(i, i+length));
				i = i + length - 1;    //减1的原因，是因为for会自增
			}
		}

		return sensitiveWordList;
	}

	/**
	 * 替换敏感字字符
	 * @param txt
	 * @param matchType
	 * @param replaceChar 替换字符，默认*
	 * @version 1.0
	 */
	public static String replaceSensitiveWord(String txt,int matchType,String replaceChar){
		String resultTxt = txt;
		Set<String> set = getSensitiveWord(txt, matchType);     //获取所有的敏感词
		Iterator<String> iterator = set.iterator();
		String word = null;
		String replaceString = null;
		while (iterator.hasNext()) {
			word = iterator.next();
			replaceString = getReplaceChars(replaceChar, word.length());
			resultTxt = resultTxt.replaceAll(word, replaceString);
		}
		return resultTxt;
	}

	/**
	 * 获取替换字符串
	 * @param replaceChar
	 * @param length
	 * @return
	 * @version 1.0
	 */
	private static String getReplaceChars(String replaceChar,int length){
		String resultReplace = replaceChar;
		for(int i = 1 ; i < length ; i++){
			resultReplace += replaceChar;
		}
		return resultReplace;
	}

	/**
	 * 检查文字中是否包含敏感字符，检查规则如下：<br>
	 * @param txt
	 * @param beginIndex
	 * @param matchType
	 * @return，如果存在，则返回敏感词字符的长度，不存在返回0
	 * @version 1.0
	 */
	public static int CheckSensitiveWord(String txt,int beginIndex,int matchType){
		boolean  flag = false;    //敏感词结束标识位：用于敏感词只有1位的情况
		int matchFlag = 0;     //匹配标识数默认为0
		char word = 0;
		Map nowMap = sensitiveWordMap;
		for(int i = beginIndex; i < txt.length() ; i++){
			word = txt.charAt(i);
			nowMap = (Map) nowMap.get(word);     //获取指定key
			if(nowMap != null){     //存在，则判断是否为最后一个
				matchFlag++;     //找到相应key，匹配标识+1
				if("1".equals(nowMap.get("isEnd"))){
					//如果为最后一个匹配规则,结束循环，返回匹配标识数
					flag = true;       //结束标志位为true
					if(SensitiveWordFilteringUtil.minMatchTYpe == matchType){
						//最小规则，直接返回,最大规则还需继续查找
						break;
					}
				}
			}
			else{
				//不存在，直接返回
				break;
			}
		}
		if(matchFlag < 2 || !flag){
			//长度必须大于等于1，为词
			matchFlag = 0;
		}
		return matchFlag;
	}

}
